df_ger_prev <- read_csv('Germany_timeseries_prep.csv')
df_ger_prev <- df_ger_prev %>% mutate(date = as.Date(date, "%d%b%Y"),
kreis = as.character(kreis)) %>%
dplyr::select(kreis, date, rate_day)
df_ger_prev
fb_files <- list.files('../FB Data/GER individual files',
'*.csv', full.names = T)
df_ger_socdist <- fb_files %>%
map(read.csv) %>% bind_rows()
kreis_match <- read_csv('kreismatch.csv')
df_ger_socdist <- df_ger_socdist %>%
select(-polygon_name) %>%
plyr::join(kreis_match, by = 'polygon_id') %>%
select(kreis, ds, all_day_bing_tiles_visited_relative_change,
all_day_ratio_single_tile_users) %>%
rename(date = ds,
socdist_tiles = all_day_bing_tiles_visited_relative_change,
socdist_single_tile = all_day_ratio_single_tile_users) %>%
mutate(kreis = as.character(kreis),
date = as.Date(date)) %>%
arrange(kreis, date)
df_ger_socdist
NA
df_ger_pers <- read_csv('Germany_timeseries_prep.csv')
df_ger_pers <- df_ger_pers %>%
select(kreis, open, sci, extra, agree, neuro) %>%
dplyr::rename(pers_o = open,
pers_c = sci,
pers_e = extra,
pers_a = agree,
pers_n = neuro) %>%
distinct() %>%
mutate(kreis = as.character(kreis))
df_ger_pers
NA
df_ger_ctrl <- read.csv2('Germany_controls.csv', sep = ';', dec=',')
df_ger_ctrl <- df_ger_ctrl %>% select(-kreis_nme) %>%
mutate(kreis = as.character(kreis),
popdens = popdens %>%
as.character() %>%
str_replace('\\.', '')%>%
as.numeric())
df_ger_ctrl
# create sequence of dates
date_sequence <- seq.Date(min(df_ger_prev$date),
max(df_ger_prev$date), 1)
# create data frame with time sequence
df_dates = tibble(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
# merge prevalence data
df_ger_prev <- df_ger_prev %>%
inner_join(df_ger_pers, by = 'kreis') %>%
inner_join(df_ger_ctrl, by = 'kreis') %>%
merge(df_dates, by='date') %>%
arrange(kreis)
df_ger_prev
# create sequence of dates
date_sequence <- seq.Date(min(df_ger_socdist$date),
max(df_ger_socdist$date), 1)
# create data frame with time sequence
df_dates = tibble(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
# merge socdist data
df_ger_socdist <- df_ger_socdist %>%
inner_join(df_ger_pers, by = 'kreis') %>%
inner_join(df_ger_ctrl, by = 'kreis') %>%
merge(df_dates, by='date') %>%
arrange(kreis)
df_ger_socdist
NA
easter <- seq.Date(as.Date('2020-04-10'), as.Date('2020-04-13'), 1)
df_ger_loess <- df_ger_socdist %>%
mutate(weekday = format(date, '%u')) %>%
filter(!(weekday %in% c('6','7') | date %in% easter)) %>%
split(.$kreis) %>%
map(~ loess(socdist_single_tile ~ time, data = .)) %>%
map(predict, 1:max(df_ger_socdist$time)) %>%
bind_rows() %>%
gather(key = 'kreis', value = 'loess') %>%
group_by(kreis) %>%
mutate(time = row_number())
df_ger_loess_2 <- df_ger_socdist %>%
mutate(weekday = format(date, '%u')) %>%
filter(!(weekday %in% c('6','7') | date %in% easter)) %>%
split(.$kreis) %>%
map(~ loess(socdist_tiles ~ time, data = .)) %>%
map(predict, 1:max(df_ger_socdist$time)) %>%
bind_rows() %>%
gather(key = 'kreis', value = 'loess') %>%
rename(loess_2 = loess) %>%
group_by(kreis) %>%
mutate(time = row_number())
df_ger_socdist <- df_ger_socdist %>%
merge(df_ger_loess, by=c('kreis', 'time')) %>%
merge(df_ger_loess_2, by=c('kreis', 'time')) %>%
mutate(weekday = format(date, '%u')) %>%
mutate(socdist_single_tile_clean = ifelse(weekday %in% c('6','7') | date %in% easter,
loess, socdist_single_tile),
socdist_tiles_clean = ifelse(weekday %in% c('6','7') | date %in% easter,
loess_2, socdist_tiles)) %>%
arrange(kreis, time) %>%
select(-weekday)
df_ger_socdist <- df_ger_socdist %>% drop_na() %>% mutate(time = time-1)
df_ger_socdist
NA
df_ger_prev %>% ggplot(aes(x=time, y=rate_day)) +
geom_point(aes(col=kreis, size=popdens)) +
geom_smooth(method="loess", se=T) +
theme(legend.position="none") +
ggtitle("Overall prevalence over time")
pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')
for (i in pers){
gg <- df_ger_prev %>% mutate(prev_tail = cut(.[[i]],
breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
labels = c('lower tail', 'center', 'upper tail'))) %>%
filter(prev_tail != 'center') %>%
ggplot(aes(x=time, y=rate_day)) +
geom_point(aes(col=kreis, size=popdens)) +
geom_smooth(method="loess", se=T) +
facet_wrap(~prev_tail) +
theme(legend.position="none") +
ggtitle(i)
print(gg)
}
df_ger_prev %>% group_by(kreis) %>%
summarize_if(is.numeric, mean) %>%
select(-kreis, -time) %>%
cor(use='pairwise.complete') %>%
round(3) %>% as.data.frame()
df_ger_socdist %>% group_by(kreis) %>%
summarize_if(is.numeric, mean) %>%
select(-kreis, -time) %>%
cor(use='pairwise.complete') %>%
round(3) %>% as.data.frame()
NA
NA
lvl2_scaled <- df_ger_prev %>%
dplyr::select(-time, -rate_day, -date) %>%
distinct() %>%
mutate_at(vars(-kreis), scale)
lvl1_scaled <- df_ger_prev %>% select(kreis, time, rate_day)
df_ger_prev_scaled <- plyr::join(lvl1_scaled, lvl2_scaled, by = 'kreis')
df_ger_prev_scaled
NA
lvl2_scaled <- df_ger_socdist %>%
dplyr::select(-time, -socdist_single_tile, -socdist_tiles, -date) %>%
distinct() %>%
mutate_at(vars(-kreis), scale)
lvl1_scaled <- df_ger_socdist %>% select(kreis, time, socdist_single_tile, socdist_tiles)
df_ger_socdist_scaled <- plyr::join(lvl1_scaled, lvl2_scaled, by = 'kreis')
df_ger_socdist_scaled
NA
# get onset day
df_ger_onset_prev <- df_ger_prev_scaled %>%
group_by(kreis) %>%
mutate(rate_cs = cumsum(rate_day)) %>%
filter(rate_cs > 0) %>%
summarize(onset_prev = min(time))
# merge with county data
df_ger_onset_prev <- df_ger_prev_scaled %>%
select(-time, -rate_day) %>%
distinct() %>%
left_join(df_ger_onset_prev, by = 'kreis')
# handle censored data
df_ger_onset_prev <- df_ger_onset_prev %>%
mutate(event = ifelse(is.na(onset_prev), 0, 1)) %>%
mutate(onset_prev = replace_na(onset_prev, as.numeric(diff(range(df_ger_prev$date)))+1))
df_ger_onset_prev
NA
# cut time series before onset
df_ger_prev_scaled <- df_ger_prev_scaled %>%
group_by(kreis) %>%
mutate(rate_cs = cumsum(rate_day)) %>%
filter(rate_cs > 0) %>%
mutate(time = time-min(time)+1) %>%
ungroup() %>%
filter(time <= 30) %>%
select(-rate_cs)
# drop counties with little data
df_ger_prev_scaled <- df_ger_prev_scaled %>%
group_by(kreis) %>%
filter(n() == 30) %>%
ungroup()
# extract slope prevalence
df_ger_slope_prev <- df_ger_prev_scaled %>%
split(.$kreis) %>%
map(~ lm(rate_day ~ time, data = .)) %>%
map(coef) %>%
map_dbl('time') %>%
as.data.frame() %>%
rownames_to_column('kreis') %>%
rename(slope_prev = '.')
# merge with county data
df_ger_slope_prev <- df_ger_prev_scaled %>%
select(-time, -rate_day) %>%
distinct() %>%
inner_join(df_ger_slope_prev, by = 'kreis') %>%
drop_na()
# standardize slopes
df_ger_slope_prev <- df_ger_slope_prev %>%
mutate(slope_prev = scale(slope_prev))
df_ger_onset_prev %>% ggplot(aes(onset_prev)) + geom_histogram()
df_ger_slope_prev %>% ggplot(aes(slope_prev)) + geom_histogram()
# predict onset from personality
cox_onset_prev <- coxph(Surv(onset_prev, event) ~
pers_o + pers_c + pers_e + pers_a + pers_n,
data = df_ger_onset_prev)
cox_onset_prev %>% summary()
Call:
coxph(formula = Surv(onset_prev, event) ~ pers_o + pers_c + pers_e +
pers_a + pers_n, data = df_ger_onset_prev)
n= 400, number of events= 400
coef exp(coef) se(coef) z Pr(>|z|)
pers_o 0.131653 1.140712 0.051953 2.534 0.0113 *
pers_c -0.104500 0.900775 0.053314 -1.960 0.0500 *
pers_e 0.056186 1.057794 0.052903 1.062 0.2882
pers_a -0.008624 0.991413 0.049615 -0.174 0.8620
pers_n -0.276799 0.758207 0.057728 -4.795 1.63e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
pers_o 1.1407 0.8766 1.0303 1.263
pers_c 0.9008 1.1102 0.8114 1.000
pers_e 1.0578 0.9454 0.9536 1.173
pers_a 0.9914 1.0087 0.8995 1.093
pers_n 0.7582 1.3189 0.6771 0.849
Concordance= 0.592 (se = 0.018 )
Likelihood ratio test= 39.92 on 5 df, p=2e-07
Wald test = 37.99 on 5 df, p=4e-07
Score (logrank) test = 37.99 on 5 df, p=4e-07
# predict onset from personality with controls
cox_onset_prev_ctrl <- coxph(Surv(onset_prev, event) ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens,
data = df_ger_onset_prev)
cox_onset_prev_ctrl %>% summary()
Call:
coxph(formula = Surv(onset_prev, event) ~ pers_o + pers_c + pers_e +
pers_a + pers_n + women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age + popdens,
data = df_ger_onset_prev)
n= 392, number of events= 392
(8 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
pers_o 0.050510 1.051807 0.059853 0.844 0.398726
pers_c -0.087556 0.916168 0.061192 -1.431 0.152473
pers_e 0.038883 1.039649 0.057895 0.672 0.501825
pers_a 0.008022 1.008054 0.057017 0.141 0.888110
pers_n -0.222357 0.800630 0.063469 -3.503 0.000459 ***
women 0.066190 1.068429 0.067498 0.981 0.326785
academics 0.267424 1.306595 0.094669 2.825 0.004730 **
afd -0.072815 0.929773 0.075881 -0.960 0.337260
hospital_beds -0.236175 0.789642 0.069836 -3.382 0.000720 ***
tourism_beds 0.076981 1.080022 0.054241 1.419 0.155829
gdp -0.175570 0.838979 0.115831 -1.516 0.129586
manufact 0.147898 1.159395 0.094458 1.566 0.117404
airport -0.171432 0.842457 0.061687 -2.779 0.005451 **
age -0.176055 0.838572 0.085439 -2.061 0.039343 *
popdens 0.062986 1.065012 0.082801 0.761 0.446841
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
pers_o 1.0518 0.9507 0.9354 1.1827
pers_c 0.9162 1.0915 0.8126 1.0329
pers_e 1.0396 0.9619 0.9281 1.1646
pers_a 1.0081 0.9920 0.9015 1.1272
pers_n 0.8006 1.2490 0.7070 0.9067
women 1.0684 0.9360 0.9360 1.2196
academics 1.3066 0.7653 1.0853 1.5730
afd 0.9298 1.0755 0.8013 1.0789
hospital_beds 0.7896 1.2664 0.6886 0.9055
tourism_beds 1.0800 0.9259 0.9711 1.2012
gdp 0.8390 1.1919 0.6686 1.0528
manufact 1.1594 0.8625 0.9634 1.3952
airport 0.8425 1.1870 0.7465 0.9507
age 0.8386 1.1925 0.7093 0.9914
popdens 1.0650 0.9390 0.9055 1.2527
Concordance= 0.658 (se = 0.016 )
Likelihood ratio test= 101.3 on 15 df, p=7e-15
Wald test = 98.22 on 15 df, p=3e-14
Score (logrank) test = 102.1 on 15 df, p=5e-15
# predict slopes from personality
lm_slope_prev <- lm(slope_prev ~ pers_o + pers_c + pers_e + pers_a + pers_n,
data = df_ger_slope_prev)
lm_slope_prev %>% summary()
Call:
lm(formula = slope_prev ~ pers_o + pers_c + pers_e + pers_a +
pers_n, data = df_ger_slope_prev)
Residuals:
Min 1Q Median 3Q Max
-1.1994 -0.5725 -0.1888 0.2940 8.6104
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.00174 0.05054 0.034 0.973
pers_o -0.03181 0.05420 -0.587 0.558
pers_c -0.05989 0.05799 -1.033 0.302
pers_e 0.07814 0.05745 1.360 0.175
pers_a 0.09131 0.05839 1.564 0.119
pers_n 0.06209 0.05971 1.040 0.299
Residual standard error: 1 on 386 degrees of freedom
Multiple R-squared: 0.01215, Adjusted R-squared: -0.0006472
F-statistic: 0.9494 on 5 and 386 DF, p-value: 0.4489
lm_slope_prev %>% confint(level=0.9)
5 % 95 %
(Intercept) -0.081594195 0.08507360
pers_o -0.121173276 0.05756145
pers_c -0.155494307 0.03572519
pers_e -0.016586494 0.17286042
pers_a -0.004968057 0.18758968
pers_n -0.036363942 0.16053810
# predict slopes from personality with controls
lm_slope_prev_ctrl <- lm(slope_prev ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens,
data = df_ger_slope_prev)
lm_slope_prev_ctrl %>% summary()
Call:
lm(formula = slope_prev ~ pers_o + pers_c + pers_e + pers_a +
pers_n + women + academics + afd + hospital_beds + tourism_beds +
gdp + manufact + airport + age + popdens, data = df_ger_slope_prev)
Residuals:
Min 1Q Median 3Q Max
-1.4465 -0.5788 -0.0848 0.3212 8.0256
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.0005982 0.0488695 -0.012 0.9902
pers_o -0.0160080 0.0603371 -0.265 0.7909
pers_c 0.0018010 0.0584550 0.031 0.9754
pers_e 0.0769996 0.0572311 1.345 0.1793
pers_a 0.0832263 0.0586703 1.419 0.1569
pers_n 0.0621839 0.0593346 1.048 0.2953
women 0.1316729 0.0715869 1.839 0.0667 .
academics -0.0618388 0.0955468 -0.647 0.5179
afd -0.0225847 0.0712477 -0.317 0.7514
hospital_beds 0.0939475 0.0657371 1.429 0.1538
tourism_beds -0.0400654 0.0550991 -0.727 0.4676
gdp -0.0130746 0.1126142 -0.116 0.9076
manufact 0.1729185 0.0894459 1.933 0.0540 .
airport 0.1503036 0.0612082 2.456 0.0145 *
age -0.1912130 0.0908032 -2.106 0.0359 *
popdens -0.1393613 0.0790525 -1.763 0.0787 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9665 on 376 degrees of freedom
Multiple R-squared: 0.1016, Adjusted R-squared: 0.06581
F-statistic: 2.836 on 15 and 376 DF, p-value: 0.0003208
lm_slope_prev_ctrl %>% confint(level=0.9)
5 % 95 %
(Intercept) -0.08118002 0.079983552
pers_o -0.11549884 0.083482831
pers_c -0.09458638 0.098188366
pers_e -0.01736960 0.171368869
pers_a -0.01351600 0.179968683
pers_n -0.03565397 0.160021700
women 0.01363199 0.249713732
academics -0.21938756 0.095709935
afd -0.14006616 0.094896812
hospital_beds -0.01444736 0.202342442
tourism_beds -0.13091924 0.050788398
gdp -0.19876601 0.172616820
manufact 0.02542968 0.320407240
airport 0.04937648 0.251230766
age -0.34093993 -0.041486083
popdens -0.26971227 -0.009010392
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_slope_prev <- cforest(slope_prev ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens,
data = df_ger_slope_prev,
controls = ctrls)
crf_slope_prev_varimp <- varimp(crf_slope_prev, nperm = 1)
crf_slope_prev_varimp_cond <- varimp(crf_slope_prev, conditional = T, nperm = 1)
crf_slope_prev_varimp
pers_o pers_c pers_e pers_a pers_n women
0.0051404436 0.0002964524 0.0008458686 0.0057439132 0.0033583582 0.0073071591
academics afd hospital_beds tourism_beds gdp manufact
0.0067329183 0.0153579773 0.0203683738 -0.0008152542 0.0229100482 0.0386981605
airport age popdens
0.0264143067 0.0168837599 0.0144487764
crf_slope_prev_varimp %>% as.data.frame() %>%
rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_slope_prev_varimp_cond
pers_o pers_c pers_e pers_a pers_n women
0.0057961897 0.0016406381 0.0006215783 0.0061210301 0.0049832188 0.0146608870
academics afd hospital_beds tourism_beds gdp manufact
0.0056496432 0.0182956022 0.0217659545 0.0008632994 0.0275604756 0.0349663348
airport age popdens
0.0191283046 0.0256610220 0.0180434714
crf_slope_prev_varimp_cond %>% as.data.frame() %>%
rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
lm_meandiff_socdist <- lm(mean_diff_socdist ~
pers_o + pers_c + pers_e + pers_a + pers_n,
data = df_ger_cpt_socdist)
lm_meandiff_socdist %>% summary()
Call:
lm(formula = mean_diff_socdist ~ pers_o + pers_c + pers_e + pers_a +
pers_n, data = df_ger_cpt_socdist)
Residuals:
Min 1Q Median 3Q Max
-2.9088 -0.5744 -0.0379 0.5828 3.9301
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.159e-16 4.626e-02 0.000 1.0000
pers_o 3.738e-01 4.945e-02 7.559 3.04e-13 ***
pers_c -8.195e-02 5.263e-02 -1.557 0.1203
pers_e 7.612e-02 5.251e-02 1.450 0.1480
pers_a -2.138e-03 5.355e-02 -0.040 0.9682
pers_n -9.522e-02 5.456e-02 -1.745 0.0817 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9112 on 382 degrees of freedom
Multiple R-squared: 0.1804, Adjusted R-squared: 0.1697
F-statistic: 16.81 on 5 and 382 DF, p-value: 5.023e-15
lm_meandiff_socdist %>% confint(level=0.9)
5 % 95 %
(Intercept) -0.07627701 0.076277015
pers_o 0.29225959 0.455325715
pers_c -0.16872746 0.004834687
pers_e -0.01046647 0.162698671
pers_a -0.09042850 0.086151871
pers_n -0.18518596 -0.005261399
lm_meandiff_socdist_ctrl <- lm(mean_diff_socdist ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens + onset_prev + slope_prev,
data = df_ger_cpt_socdist)
lm_meandiff_socdist_ctrl %>% summary()
Call:
lm(formula = mean_diff_socdist ~ pers_o + pers_c + pers_e + pers_a +
pers_n + women + academics + afd + hospital_beds + tourism_beds +
gdp + manufact + airport + age + popdens + onset_prev + slope_prev,
data = df_ger_cpt_socdist)
Residuals:
Min 1Q Median 3Q Max
-2.87447 -0.43609 -0.04652 0.40255 1.91077
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.272459 0.192825 1.413 0.1585
pers_o 0.091983 0.041734 2.204 0.0281 *
pers_c 0.022486 0.040543 0.555 0.5795
pers_e -0.022524 0.040138 -0.561 0.5750
pers_a -0.007898 0.041152 -0.192 0.8479
pers_n 0.018154 0.041694 0.435 0.6635
women 0.108055 0.049875 2.167 0.0309 *
academics 0.108896 0.063995 1.702 0.0897 .
afd -0.094799 0.050150 -1.890 0.0595 .
hospital_beds -0.106259 0.044865 -2.368 0.0184 *
tourism_beds 0.024920 0.038925 0.640 0.5224
gdp 0.310743 0.076923 4.040 6.51e-05 ***
manufact -0.037805 0.063291 -0.597 0.5507
airport -0.032251 0.043407 -0.743 0.4580
age -0.272654 0.063959 -4.263 2.56e-05 ***
popdens 0.103290 0.054637 1.890 0.0595 .
onset_prev -0.004866 0.003387 -1.436 0.1517
slope_prev 0.211525 0.040301 5.249 2.59e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6717 on 370 degrees of freedom
Multiple R-squared: 0.5686, Adjusted R-squared: 0.5488
F-statistic: 28.68 on 17 and 370 DF, p-value: < 2.2e-16
lm_meandiff_socdist_ctrl %>% confint(level=0.9)
5 % 95 %
(Intercept) -0.04550617 0.590424375
pers_o 0.02316347 0.160801944
pers_c -0.04436977 0.089341100
pers_e -0.08870971 0.043662416
pers_a -0.07575735 0.059960924
pers_n -0.05059917 0.086906294
women 0.02581175 0.190297877
academics 0.00336963 0.214422260
afd -0.17749642 -0.012102299
hospital_beds -0.18023974 -0.032277423
tourism_beds -0.03926581 0.089105948
gdp 0.18389830 0.437587645
manufact -0.14217100 0.066561843
airport -0.10382845 0.039327244
age -0.37812107 -0.167186538
popdens 0.01319502 0.193384651
onset_prev -0.01045107 0.000720025
slope_prev 0.14506839 0.277981025
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_meandiff_socdist <- cforest(mean_diff_socdist ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens + onset_prev + slope_prev,
data = df_ger_cpt_socdist %>% drop_na(),
controls = ctrls)
crf_meandiff_socdist_varimp <- varimp(crf_meandiff_socdist, nperm = 1)
crf_meandiff_socdist_varimp_cond <- varimp(crf_meandiff_socdist, conditional = T, nperm = 1)
crf_meandiff_socdist_varimp
pers_o pers_c pers_e pers_a pers_n women
1.759975e-02 6.337446e-05 2.610670e-03 1.835394e-04 9.390769e-04 7.905424e-03
academics afd hospital_beds tourism_beds gdp manufact
1.577308e-01 1.241787e-01 7.915686e-03 1.073799e-03 1.369025e-01 8.551752e-03
airport age popdens onset_prev slope_prev
8.763834e-03 2.131369e-01 7.312826e-02 9.758102e-04 1.419053e-02
crf_meandiff_socdist_varimp %>% as.data.frame() %>%
rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_meandiff_socdist_varimp_cond
pers_o pers_c pers_e pers_a pers_n women
0.0180832096 -0.0014234895 0.0014359473 -0.0005199509 0.0002390713 0.0070661971
academics afd hospital_beds tourism_beds gdp manufact
0.1595062896 0.1220725600 0.0058460091 0.0030843814 0.1339119408 0.0115030151
airport age popdens onset_prev slope_prev
0.0095423799 0.2094029437 0.0710187422 0.0013900179 0.0152997826
crf_meandiff_socdist_varimp_cond %>% as.data.frame() %>%
rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
# predict hazard from personality
cox_cpt_socdist_2 <- coxph(Surv(cpt_day_socdist_2, event) ~
pers_o + pers_c + pers_e + pers_a + pers_n,
data = df_ger_cpt_socdist)
cox_cpt_socdist_2 %>% summary()
Call:
coxph(formula = Surv(cpt_day_socdist_2, event) ~ pers_o + pers_c +
pers_e + pers_a + pers_n, data = df_ger_cpt_socdist)
n= 388, number of events= 388
coef exp(coef) se(coef) z Pr(>|z|)
pers_o -0.03062 0.96985 0.05543 -0.552 0.581
pers_c -0.08996 0.91397 0.05478 -1.642 0.101
pers_e -0.01517 0.98495 0.05758 -0.263 0.792
pers_a -0.04174 0.95912 0.05991 -0.697 0.486
pers_n -0.06473 0.93732 0.05898 -1.098 0.272
exp(coef) exp(-coef) lower .95 upper .95
pers_o 0.9698 1.031 0.8700 1.081
pers_c 0.9140 1.094 0.8209 1.018
pers_e 0.9849 1.015 0.8798 1.103
pers_a 0.9591 1.043 0.8529 1.079
pers_n 0.9373 1.067 0.8350 1.052
Concordance= 0.542 (se = 0.026 )
Likelihood ratio test= 5.04 on 5 df, p=0.4
Wald test = 5.17 on 5 df, p=0.4
Score (logrank) test = 5.15 on 5 df, p=0.4
# predict hazard from personality with controls
cox_cpt_socdist_ctrl_2 <- coxph(Surv(cpt_day_socdist_2, event) ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens + onset_prev + slope_prev,
data = df_ger_cpt_socdist)
cox_cpt_socdist_ctrl_2 %>% summary()
Call:
coxph(formula = Surv(cpt_day_socdist_2, event) ~ pers_o + pers_c +
pers_e + pers_a + pers_n + women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age + popdens +
onset_prev + slope_prev, data = df_ger_cpt_socdist)
n= 388, number of events= 388
coef exp(coef) se(coef) z Pr(>|z|)
pers_o -0.076112 0.926712 0.065660 -1.159 0.24638
pers_c -0.020391 0.979816 0.059956 -0.340 0.73378
pers_e -0.065070 0.937002 0.063923 -1.018 0.30870
pers_a -0.018041 0.982121 0.063163 -0.286 0.77517
pers_n -0.010432 0.989622 0.066307 -0.157 0.87499
women 0.124722 1.132833 0.080148 1.556 0.11967
academics -0.020774 0.979441 0.102622 -0.202 0.83958
afd -0.245341 0.782438 0.078871 -3.111 0.00187 **
hospital_beds -0.121340 0.885733 0.066852 -1.815 0.06952 .
tourism_beds 0.042779 1.043707 0.058321 0.734 0.46325
gdp -0.102634 0.902457 0.122794 -0.836 0.40326
manufact -0.044267 0.956699 0.100845 -0.439 0.66069
airport 0.058871 1.060638 0.068799 0.856 0.39217
age -0.068118 0.934150 0.099304 -0.686 0.49274
popdens 0.181209 1.198666 0.085109 2.129 0.03324 *
onset_prev -0.013244 0.986844 0.005022 -2.637 0.00836 **
slope_prev 0.139626 1.149843 0.063239 2.208 0.02725 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
pers_o 0.9267 1.0791 0.8148 1.0540
pers_c 0.9798 1.0206 0.8712 1.1020
pers_e 0.9370 1.0672 0.8267 1.0621
pers_a 0.9821 1.0182 0.8678 1.1116
pers_n 0.9896 1.0105 0.8690 1.1270
women 1.1328 0.8827 0.9682 1.3255
academics 0.9794 1.0210 0.8010 1.1977
afd 0.7824 1.2781 0.6704 0.9132
hospital_beds 0.8857 1.1290 0.7770 1.0097
tourism_beds 1.0437 0.9581 0.9310 1.1701
gdp 0.9025 1.1081 0.7094 1.1480
manufact 0.9567 1.0453 0.7851 1.1658
airport 1.0606 0.9428 0.9268 1.2138
age 0.9342 1.0705 0.7689 1.1349
popdens 1.1987 0.8343 1.0145 1.4163
onset_prev 0.9868 1.0133 0.9772 0.9966
slope_prev 1.1498 0.8697 1.0158 1.3016
Concordance= 0.659 (se = 0.029 )
Likelihood ratio test= 64.35 on 17 df, p=2e-07
Wald test = 60.37 on 17 df, p=9e-07
Score (logrank) test = 61.6 on 17 df, p=6e-07
lm_meandiff_socdist_2 <- lm(mean_diff_socdist_2 ~
pers_o + pers_c + pers_e + pers_a + pers_n,
data = df_ger_cpt_socdist)
lm_meandiff_socdist_2 %>% summary()
Call:
lm(formula = mean_diff_socdist_2 ~ pers_o + pers_c + pers_e +
pers_a + pers_n, data = df_ger_cpt_socdist)
Residuals:
Min 1Q Median 3Q Max
-3.4864 -0.6476 0.0330 0.6232 3.9993
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.163e-17 4.899e-02 0.000 1.000
pers_o 2.149e-01 5.236e-02 4.103 4.98e-05 ***
pers_c -2.038e-02 5.573e-02 -0.366 0.715
pers_e 5.729e-02 5.560e-02 1.030 0.303
pers_a 8.964e-02 5.670e-02 1.581 0.115
pers_n -5.135e-02 5.777e-02 -0.889 0.375
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9649 on 382 degrees of freedom
Multiple R-squared: 0.08099, Adjusted R-squared: 0.06896
F-statistic: 6.733 on 5 and 382 DF, p-value: 4.976e-06
lm_meandiff_socdist_2 %>% confint(level=0.9)
5 % 95 %
(Intercept) -0.080769970 0.08076997
pers_o 0.128518892 0.30119012
pers_c -0.112269977 0.07151552
pers_e -0.034389315 0.14897579
pers_a -0.003848162 0.18313334
pers_n -0.146606823 0.04391585
lm_meandiff_socdist_ctrl_2 <- lm(mean_diff_socdist_2 ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens + onset_prev + slope_prev,
data = df_ger_cpt_socdist)
lm_meandiff_socdist_ctrl_2 %>% summary()
Call:
lm(formula = mean_diff_socdist_2 ~ pers_o + pers_c + pers_e +
pers_a + pers_n + women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age + popdens +
onset_prev + slope_prev, data = df_ger_cpt_socdist)
Residuals:
Min 1Q Median 3Q Max
-3.3284 -0.5947 -0.0486 0.4943 2.6870
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1053843 0.2498635 0.422 0.67344
pers_o 0.0505085 0.0540795 0.934 0.35093
pers_c 0.0006957 0.0525363 0.013 0.98944
pers_e -0.0061037 0.0520103 -0.117 0.90664
pers_a 0.0468739 0.0533251 0.879 0.37996
pers_n 0.0156088 0.0540273 0.289 0.77281
women 0.1236199 0.0646283 1.913 0.05655 .
academics 0.1883589 0.0829247 2.271 0.02369 *
afd 0.0381998 0.0649850 0.588 0.55701
hospital_beds -0.2356647 0.0581359 -4.054 6.15e-05 ***
tourism_beds -0.0174050 0.0504386 -0.345 0.73024
gdp 0.2701306 0.0996771 2.710 0.00704 **
manufact -0.1433300 0.0820132 -1.748 0.08136 .
airport -0.0448547 0.0562473 -0.797 0.42570
age -0.1268930 0.0828783 -1.531 0.12660
popdens -0.0209434 0.0707983 -0.296 0.76753
onset_prev -0.0018842 0.0043892 -0.429 0.66797
slope_prev 0.2389105 0.0522227 4.575 6.51e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8704 on 370 degrees of freedom
Multiple R-squared: 0.2756, Adjusted R-squared: 0.2423
F-statistic: 8.281 on 17 and 370 DF, p-value: < 2.2e-16
lm_meandiff_socdist_ctrl_2 %>% confint(level=0.9)
5 % 95 %
(Intercept) -0.306636205 0.517404846
pers_o -0.038667716 0.139684709
pers_c -0.085935802 0.087327214
pers_e -0.091867797 0.079660466
pers_a -0.041058174 0.134806050
pers_n -0.073481190 0.104698880
women 0.017049040 0.230190746
academics 0.051617574 0.325100319
afd -0.068959305 0.145358984
hospital_beds -0.331529771 -0.139799674
tourism_beds -0.100577292 0.065767285
gdp 0.105764825 0.434496381
manufact -0.278568362 -0.008091609
airport -0.137605486 0.047896166
age -0.263557896 0.009771815
popdens -0.137688624 0.095801741
onset_prev -0.009121997 0.005353544
slope_prev 0.152796191 0.325024849
ctrls <- cforest_unbiased(ntree=500, mtry=5)
crf_meandiff_socdist_2 <- cforest(mean_diff_socdist_2 ~
pers_o + pers_c + pers_e + pers_a + pers_n +
women + academics + afd + hospital_beds +
tourism_beds + gdp + manufact + airport + age +
popdens + onset_prev + slope_prev,
data = df_ger_cpt_socdist %>% drop_na(),
controls = ctrls)
crf_meandiff_socdist_varimp <- varimp(crf_meandiff_socdist_2, nperm = 1)
crf_meandiff_socdist_varimp_cond <- varimp(crf_meandiff_socdist_2, conditional = T, nperm = 1)
crf_meandiff_socdist_varimp
pers_o pers_c pers_e pers_a pers_n women
0.0061045393 -0.0024349997 0.0069040770 0.0083183883 0.0009496786 0.0045345045
academics afd hospital_beds tourism_beds gdp manufact
0.1876559130 0.0277172348 0.0269946837 0.0018742661 0.0361798105 0.0179608113
airport age popdens onset_prev slope_prev
0.0059095875 0.0536333125 0.0187286793 -0.0006441898 0.0477348349
crf_meandiff_socdist_varimp %>% as.data.frame() %>%
rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
crf_meandiff_socdist_varimp_cond
pers_o pers_c pers_e pers_a pers_n women
0.0055679584 -0.0021325301 0.0026940890 0.0076168975 -0.0004095212 0.0059936567
academics afd hospital_beds tourism_beds gdp manufact
0.1855315339 0.0303020488 0.0293348797 0.0041487670 0.0301259137 0.0205806193
airport age popdens onset_prev slope_prev
0.0045007395 0.0512203214 0.0193736309 0.0011647302 0.0418606350
crf_meandiff_socdist_varimp_cond %>% as.data.frame() %>%
rownames_to_column('variable') %>%
ggplot(aes(x=variable, y=.)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90))
ger_list_results <- list(cox_onset_prev, cox_onset_prev_ctrl,
lm_slope_prev, lm_slope_prev_ctrl,
cox_cpt_socdist, cox_cpt_socdist_ctrl,
lm_meandiff_socdist, lm_meandiff_socdist_ctrl,
cox_cpt_socdist_2, cox_cpt_socdist_ctrl_2,
lm_meandiff_socdist_2, lm_meandiff_socdist_ctrl_2)
results_names <- list('cox_onset_prev', 'cox_onset_prev_ctrl',
'lm_slope_prev', 'lm_slope_prev_ctrl',
'cox_cpt_socdist', 'cox_cpt_socdist_ctrl',
'lm_meandiff_socdist', 'lm_meandiff_socdist_ctrl',
'cox_cpt_socdist_2', 'cox_cpt_socdist_ctrl_2',
'lm_meandiff_socdist_2', 'lm_meandiff_socdist_ctrl_2')
names(ger_list_results) <- results_names
save(ger_list_results, file="ger_list_results.RData")
write_csv(df_ger_slope_prev, '/Users/hp2500/Google Drive/STUDY/Columbia/Research/Corona/Delivery/df_ger_slope_prev.csv')
write_csv(df_ger_cpt_socdist, '/Users/hp2500/Google Drive/STUDY/Columbia/Research/Corona/Delivery/df_ger_cpt_socdist.csv')
write_csv(df_ger_slope_prev, '/Users/hp2500/Google Drive/STUDY/Columbia/Research/Corona/Delivery/df_ger_slope_prev.csv')
write_csv(df_ger_cpt_socdist, '/Users/hp2500/Google Drive/STUDY/Columbia/Research/Corona/Delivery/df_ger_cpt_socdist.csv')